package com.ro0tsh3ll.controller;

//import org.apache.commons.lang3.StringUtils;
//import us.codecraft.webmagic.Page;
//import us.codecraft.webmagic.Site;
//import us.codecraft.webmagic.Spider;
//import us.codecraft.webmagic.pipeline.JsonFilePipeline;
//import us.codecraft.webmagic.processor.PageProcessor;
//import us.codecraft.webmagic.processor.example.GithubRepoPageProcessor;
//import us.codecraft.webmagic.selector.Selectable;
//import java.util.Date;
//import java.io.*;
//import java.net.HttpURLConnection;
//import java.net.MalformedURLException;
//import java.net.URL;
//import java.sql.*;
//import java.util.regex.Matcher;
//import java.util.regex.Pattern;
//
///**
// * Created by Administrator on 2014/6/16.
// */
//public class WebMagic implements PageProcessor{
//    // 部分一：抓取网站的相关配置，包括编码、抓取间隔、重试次数等
//    private Site site = Site.me().setRetryTimes(3).setSleepTime(1000);
//    @Override
//    public void process(Page page) {
//        Selectable val = page.getHtml().xpath("//table/tbody/tr/td/a/text()");
//        page.putField("val", page.getHtml().xpath("//table/tbody/tr/td/a/text()").toString());
//        System.out.println(page.getResultItems().get("val"));
//    }
//
//    @Override
//    public Site getSite() {
//        return null;
//    }
//
//    public static void main(String[] args) {
//        Spider.create(new GithubRepoPageProcessor())
//                .addUrl("https://github.com/justjavac/Google-IPs/blob/master/README.md#google-%E5%85%A8%E7%90%83-ip-%E5%9C%B0%E5%9D%80%E5%BA%93")
//                        //开启5个线程抓取
//                .addPipeline(new JsonFilePipeline("D:\\webmagic\\"))
//                .thread(5)
//                        //启动爬虫
//                .run();
//    }
//}